Packages Used
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.1.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
if (!require("ggbeeswarm")) install.packages("ggbeeswarm")
## Loading required package: ggbeeswarm
library(ggbeeswarm)
if (!require("plotly")) install.packages("plotly")
## Loading required package: plotly
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(plotly)
Datasets Used
bills <- read_csv("../dataraw/billionaires_2021_10_31.csv")
## Rows: 500 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Name, Total_Net_Worth, LastChange, YTDChange, Country, Industry
## dbl (4): Rank, Total_Net_Worth_Bil, LastChange_Bil, YTDChange_Bil
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(bills)
## Rows: 500
## Columns: 10
## $ Rank <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,…
## $ Name <chr> "Elon Musk", "Jeff Bezos", "Bernard Arnault", "Bil…
## $ Total_Net_Worth <chr> "$311B", "$195B", "$167B", "$136B", "$131B", "$126…
## $ Total_Net_Worth_Bil <dbl> 311.0, 195.0, 167.0, 136.0, 131.0, 126.0, 121.0, 1…
## $ LastChange <chr> "+$9.32B", "-$3.79B", "-$544M", "+$906M", "+$1.71B…
## $ LastChange_Bil <dbl> 9.3200, -3.7900, -0.5440, 0.9060, 1.7100, 1.6400, …
## $ YTDChange <chr> "+$141B", "+$5.06B", "+$52.7B", "+$4.40B", "+$48.7…
## $ YTDChange_Bil <dbl> 141.00, 5.06, 52.70, 4.40, 48.70, 46.70, 17.30, 37…
## $ Country <chr> "UnitedStates", "UnitedStates", "France", "UnitedS…
## $ Industry <chr> "Technology", "Technology", "Consumer", "Technolog…
is_tibble(bills)
## [1] TRUE
# bills[bills == "$121B"] <- NA # Check that is.na check is checking correctly.
# ###
# ### Re-run read_cvs to refresh dataframe after testing
# ###
#
# for (i in 1:ncol(bills)) {
# print(which(is.na(bills[ ,i])))
# }
# ggplot(bills, aes(LastChange_Bil, Total_Net_Worth_Bil, color = Industry)) +
# geom_point() +
# scale_x_continuous(trans = "log2") +
# scale_y_continuous(trans = "log2")
bills_ex <- bills %>%
group_by(Industry) %>%
summarise(Sum = sum(Total_Net_Worth_Bil))
bills_red <- bills %>%
mutate(
Industry = ifelse(
bills$Industry == "Technology" |
bills$Industry == "Industrial" |
bills$Industry == "Finance" |
bills$Industry == "Diversified"|
bills$Industry == "Consumer",
Industry,
"Other"),
LastChange_Prop = (LastChange_Bil + Total_Net_Worth_Bil) / Total_Net_Worth_Bil,
LastChange_Coef = (LastChange_Bil + mean(Total_Net_Worth_Bil)) / mean(Total_Net_Worth_Bil)
)
bills_rex <- bills_red %>%
group_by(Industry) %>%
summarise(Sum = sum(Total_Net_Worth_Bil))
jitter <- position_jitter(width = 0.2, height = NULL, seed = 2)
# ggplot(bills_red, aes(x = Industry, fill = Industry)) +
# geom_bar()
#
# ggplot(bills, aes(x = Industry, fill = Industry)) +
# geom_bar() +
# coord_flip()
#
# ggplot(bills_ex, aes(Industry, Sum, fill = Industry)) +
# stat_summary(geom="bar", position = "stack") +
# xlab("Sector") +
# coord_flip()
#
# ggplot(bills_rex, aes(Industry, Sum, fill = Industry)) +
# stat_summary(geom="bar", position = "stack") +
# xlab("Sector")
#
#
#
# glimpse(strmtv_long)
# plot(bills, aes(LastChange_Bil, Total_Net_Worth_Bil, color = Country)) +
# geom_point() +
# scale_x_continuous(n.breaks = 10) +
# scale_y_continuous(n.breaks = 10)
# ggplot(bills_red,
# aes(
# x = 1,
# y = LastChange_Prop,
# color = Industry,
# size = Total_Net_Worth_Bil
# )) +
# geom_point(position = jitter, alpha = 0.8) +
# scale_size(range = c(0, 20))
#
# ggplot(bills_red,
# aes(
# x = 1,
# y = LastChange_Coef,
# color = Industry,
# size = Total_Net_Worth_Bil
# )) +
# geom_point(position = jitter) +
# scale_size(range = c(0, 20))
p <- ggplot(bills_red,
aes(
x = 1,
y = YTDChange_Bil,
color = Industry,
size = Total_Net_Worth_Bil,
text = Name
)) +
geom_point(position = jitter) +
geom_point(shape = 1, colour = "gray50", alpha = .2, position = jitter) +
scale_y_continuous(n.breaks = 11) +
scale_size(range = c(0, 20))
ggplotly(p, tooltip = c("text", "size"))
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] plotly_4.10.0 ggbeeswarm_0.6.0 forcats_0.5.1 stringr_1.4.0
## [5] dplyr_1.0.7 purrr_0.3.4 readr_2.1.0 tidyr_1.1.3
## [9] tibble_3.1.6 ggplot2_3.3.5 tidyverse_1.3.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.7 lubridate_1.8.0 assertthat_0.2.1 digest_0.6.28
## [5] utf8_1.2.2 R6_2.5.1 cellranger_1.1.0 backports_1.4.0
## [9] reprex_2.0.1 evaluate_0.14 httr_1.4.2 pillar_1.6.4
## [13] rlang_0.4.12 lazyeval_0.2.2 readxl_1.3.1 rstudioapi_0.13
## [17] data.table_1.14.2 jquerylib_0.1.4 rmarkdown_2.11 labeling_0.4.2
## [21] htmlwidgets_1.5.4 bit_4.0.4 munsell_0.5.0 broom_0.7.10
## [25] compiler_3.6.0 vipor_0.4.5 modelr_0.1.8 xfun_0.28
## [29] pkgconfig_2.0.3 htmltools_0.5.2 tidyselect_1.1.1 viridisLite_0.4.0
## [33] fansi_0.5.0 crayon_1.4.2 tzdb_0.2.0 dbplyr_2.1.1
## [37] withr_2.4.2 grid_3.6.0 jsonlite_1.7.2 gtable_0.3.0
## [41] lifecycle_1.0.1 DBI_1.1.1 magrittr_2.0.1 scales_1.1.1
## [45] vroom_1.5.6 cli_3.1.0 stringi_1.7.5 farver_2.1.0
## [49] fs_1.5.0 xml2_1.3.2 bslib_0.3.1 ellipsis_0.3.2
## [53] generics_0.1.1 vctrs_0.3.8 tools_3.6.0 bit64_4.0.5
## [57] glue_1.5.0 beeswarm_0.4.0 crosstalk_1.2.0 hms_1.1.1
## [61] parallel_3.6.0 fastmap_1.1.0 yaml_2.2.1 colorspace_2.0-2
## [65] rvest_1.0.2 knitr_1.33 haven_2.4.3 sass_0.4.0